Björn Reineking, 2022-09-13
#’ ## Preamble - Movement is about solving the problem of life - Trajectory analysis is about finding the problem
We will use the buffalo data set.
library(ggplot2)
library(RStoolbox)
library(animove)
library(survival)
library(MASS)
library(lubridate)
library(dplyr)
library(nlme)
library(pbs)
library(circular)
library(CircStats)
library(amt)
library(ctmm)
library(move)
See Kami’s slides for how we got here…
data(buffalo_utm)
Currently, there is no conversion function from move::moveStack to amt::track implemented, so we do it by hand
buffalo_tracks <- as.data.frame(buffalo_utm) %>%
select(id = individual.local.identifier, x = coords.x1, y = coords.x2, ts = timestamp) %>%
make_track(x, y, ts, id, crs = projection(buffalo_utm))
summary(buffalo_tracks)
## x_ y_ t_
## Min. :356761 Min. :7216465 Min. :2005-02-17 05:05:00
## 1st Qu.:375417 1st Qu.:7234241 1st Qu.:2005-08-22 06:36:15
## Median :379045 Median :7290006 Median :2005-10-14 02:38:30
## Mean :380564 Mean :7275531 Mean :2005-10-21 03:03:24
## 3rd Qu.:386182 3rd Qu.:7310209 3rd Qu.:2005-12-14 18:32:15
## Max. :397937 Max. :7338708 Max. :2006-12-31 14:34:00
## id
## Length:24662
## Class :character
## Mode :character
##
##
##
data(buffalo_env)
We can plot the buffalo tracks, but they do not have a plot method, so we need to give a bit more information to the points() function.
raster::plot(raster(buffalo_env, 1))
points(y_ ~ x_, data = buffalo_tracks, col = factor(buffalo_tracks$id))
To speed up analyses, we will only work with one individual, Cilla
cilla <- filter(buffalo_tracks, id == "Cilla")
hist(step_lengths(cilla))
which(step_lengths(cilla)>5000)
## [1] 1
The very first step is unusually long; let us plot the first day in red on top of the full trajectory.
plot(cilla, type = "l")
lines(filter(cilla, t_ < min(t_) + days(1)), col = "red")
Let us exclude the full first day
cilla <- filter(cilla, t_ > min(t_) + days(1))
It is a good idea to perform the analysis at several temporal scales, i.e. different step durations.
The initial sampling rate of cilla is about 1 hour:
summarize_sampling_rate(cilla)
## # A tibble: 1 × 9
## min q1 median mean q3 max sd n unit
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <chr>
## 1 0.05 1 1 0.998 1 2 0.0516 3503 hour
SSF assumes that the velocities are not autocorrelated. So we cannot simply do the analysis at the highest temporal resolution of the data. We could try to use the downweighting trick that we use for the RSF, but for the SSF, the time between successive steps will also affect the point estimates of the parameters, so the problem is a bit more tricky. As a rule-of-thumb, I would downsample the data such that the autocorrelation in velocities has decayed to something between 1% and 2%. Say you had a sampling of 1 hour, and the SSF should be done at 3 hours given this rule of thumb, then you could still use all data, by constructing 3 step data sets, each starting one hour apart, and give each a weight of 1/3 in the likelihood.
library(ctmm)
cilla_telemetry <- as_telemetry(cilla)
plot(variogram(cilla_telemetry), xlim = c(0, 10 * 3600))
GUESS <- ctmm.guess(cilla_telemetry, interactive=FALSE)
FIT <- ctmm.fit(cilla_telemetry, GUESS)
plot(variogram(cilla_telemetry), xlim = c(0, 10 * 3600))
abline(v = FIT$tau["velocity"] * -log(0.01) / 3600, col = "blue")
abline(v = FIT$tau["velocity"] * -log(0.02) / 3600, col = "red")
legend("bottomright", lty = 1, col = c("blue", "red"), legend = c("1%", "2%"), title = "Velocity\nautocorrelation",
bty = "n")
Now we resample to 3 hour intervals, with a tolerance of 15 minutes
step_duration <- 3
cilla <- track_resample(cilla, hours(step_duration), tolerance = minutes(15))
Look at the new sampling rate
summarize_sampling_rate(cilla)
## # A tibble: 1 × 9
## min q1 median mean q3 max sd n unit
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <chr>
## 1 2.95 3 3 3.00 3 4 0.0308 1165 hour
So there is at least two observations that are more than 3 hours 15 minutes apart, so there should be at least two bursts:
table(cilla$burst_)
##
## 1 2
## 322 844
If there are bursts, we may want to filter bursts with very few locations. For example, to calculate a turning angle, we need at least three locations. So we often will want to filter out bursts with at least 3 observations:
cilla <- filter_min_n_burst(cilla, 3)
Convert locations to steps. We will have fewer rows in the step data frame than in the track data frame because the final position is not a complete step.
ssf_cilla <- steps_by_burst(cilla)
We still have steps without a turning angle (the first step in a burst)
which(is.na(ssf_cilla$ta_))
## [1] 1 322
ssf_cilla <- filter(ssf_cilla, !is.na(ta_))
par(mfrow = c(1, 2))
hist(ssf_cilla$sl_, breaks = 20, main = "",
xlab = "Distance (m)")
hist(ssf_cilla$ta_, main="",breaks = seq(-pi, pi, len=11),
xlab="Relative angle (radians)")
fexp <- fitdistr(ssf_cilla$sl_, "exponential")
fgamma <- amt::fit_distr(ssf_cilla$sl_, "gamma")
par(mfrow = c(1, 1))
hist(ssf_cilla$sl_, breaks = 50, prob = TRUE,
xlim = c(0, 8000), ylim = c(0, 2e-3),
xlab = "Step length (m)", main = "")
plot(function(x) dexp(x, rate = fexp$estimate), add = TRUE, from = 0.1, to = 8000, col = "red")
plot(function(x) dgamma(x, shape = fgamma$params$shape,
scale = fgamma$params$scale), add = TRUE, from = 0.1, to = 8000, col = "blue")
legend("topright", col = c("red", "blue"), lty = 1,
legend = c("exponential", "gamma"), bty = "n")
fvmises <- fit_distr(ssf_cilla$ta_, "vonmises")
par(mfrow = c(1, 1))
hist(ssf_cilla$ta_, breaks = 50, prob = TRUE,
xlim = c(-pi, pi),
xlab = "Turning angles (rad)", main = "")
plot(function(x) dvonmises(x, mu = 0, kappa = fvmises$params$kappa), add = TRUE, from = -pi, to = pi, col = "red")
## Warning in as.circular(x): an object is coerced to the class 'circular' using default value for the following components:
## type: 'angles'
## units: 'radians'
## template: 'none'
## modulo: 'asis'
## zero: 0
## rotation: 'counter'
## conversion.circularxradians0counter
## Warning in as.circular(x): an object is coerced to the class 'circular' using default value for the following components:
## type: 'angles'
## units: 'radians'
## template: 'none'
## modulo: 'asis'
## zero: 0
## rotation: 'counter'
## conversion.circularmuradians0counter
Create random steps. We typically get a warning that “Step-lengths or turning angles contained NA, which were removed”, because of the missing turning angles at the start of a burst.
set.seed(2)
ssf_cilla <- steps_by_burst(cilla)
ssf_cilla <- random_steps(ssf_cilla, n_control = 200)
my_step_id <- 3
ggplot(data = filter(ssf_cilla, step_id_ == my_step_id | (step_id_ %in% c(my_step_id - 1, my_step_id - 2) & case_ == 1)),
aes(x = x2_, y = y2_)) + geom_point(aes(color = factor(step_id_))) + geom_point(data = filter(ssf_cilla, step_id_ %in% c(my_step_id, my_step_id - 1, my_step_id - 2) & case_ == 1), aes(x = x2_, y = y2_, color = factor(step_id_), size = 2))
I recommend to always use the option “both”, which provides the environmental conditions at the start and the end of the step. The condition at the end are what we use for selection, and the conditions at the start can be used to modify e.g. turning angles and step length.
ssf_cilla <- extract_covariates(ssf_cilla, buffalo_env, where = "both")
Adding hour modelling diurnal variation in step lengths, turning angles, and preference for environmental conditions
ssf_cilla <- mutate(ssf_cilla, "hour" = hour(t1_) + minute(t1_) / 60)
Remove NA’s
ssf_cilla <- ssf_cilla[complete.cases(ssf_cilla),]
m_1 <- fit_clogit(ssf_cilla, case_ ~ cos(ta_) + sl_ + log(sl_) + slope_end + elev_end +
mean_NDVI_end + var_NDVI_end + strata(step_id_))
summary(m_1)
## Call:
## coxph(formula = Surv(rep(1, 233762L), case_) ~ cos(ta_) + sl_ +
## log(sl_) + slope_end + elev_end + mean_NDVI_end + var_NDVI_end +
## strata(step_id_), data = data, method = "exact")
##
## n= 233762, number of events= 1162
##
## coef exp(coef) se(coef) z Pr(>|z|)
## cos(ta_) 1.749e-02 1.018e+00 4.691e-02 0.373 0.70926
## sl_ 6.105e-06 1.000e+00 6.204e-05 0.098 0.92161
## log(sl_) 1.010e-02 1.010e+00 3.350e-02 0.302 0.76296
## slope_end -8.711e-03 9.913e-01 2.151e-02 -0.405 0.68543
## elev_end -1.586e-02 9.843e-01 3.226e-03 -4.915 8.86e-07 ***
## mean_NDVI_end 5.646e+00 2.832e+02 2.137e+00 2.642 0.00825 **
## var_NDVI_end 2.914e-01 1.338e+00 9.797e+00 0.030 0.97627
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## cos(ta_) 1.0176 0.982661 9.282e-01 1.116e+00
## sl_ 1.0000 0.999994 9.999e-01 1.000e+00
## log(sl_) 1.0102 0.989947 9.460e-01 1.079e+00
## slope_end 0.9913 1.008750 9.504e-01 1.034e+00
## elev_end 0.9843 1.015983 9.781e-01 9.905e-01
## mean_NDVI_end 283.2445 0.003531 4.293e+00 1.869e+04
## var_NDVI_end 1.3383 0.747194 6.129e-09 2.923e+08
##
## Concordance= 0.546 (se = 0.008 )
## Likelihood ratio test= 64.17 on 7 df, p=2e-11
## Wald test = 55.28 on 7 df, p=1e-09
## Score (logrank) test = 55.57 on 7 df, p=1e-09
In statistics, multicollinearity (also collinearity) is a phenomenon in which one predictor variables in a multiple regression model can be linearly predicted from the others with a substantial degree of accuracy. In this situation the coefficient estimates of the multiple regression may change erratically in response to small changes in the model or the data.” Wikipedia, accessed 29.08.2017
One way of dealing with collinearity is to select a subset of variables that is sufficiently uncorrelated Dormann et al. 2013. Here we simply look at pairwise correlation between predictors.
round(cor(ssf_cilla[, c("slope_end", "elev_end",
"water_dist_end", "mean_NDVI_end", "var_NDVI_end")]), 2)
## slope_end elev_end water_dist_end mean_NDVI_end var_NDVI_end
## slope_end 1.00 0.37 0.09 0.21 0.10
## elev_end 0.37 1.00 0.77 -0.15 0.57
## water_dist_end 0.09 0.77 1.00 -0.44 0.67
## mean_NDVI_end 0.21 -0.15 -0.44 1.00 -0.40
## var_NDVI_end 0.10 0.57 0.67 -0.40 1.00
elev and water_dist are positively correlated > 0.7 ## Which collinear variable to pick? - The one that is more relevant - The one that is by itself a better predictor
m1_water <- fit_clogit(ssf_cilla, case_ ~ cos(ta_) + sl_ + log(sl_) + water_dist_end + strata(step_id_))
m1_elev <- fit_clogit(ssf_cilla, case_ ~ cos(ta_) + sl_ + log(sl_) + elev_end + strata(step_id_))
AIC(m1_water$model)
## [1] 12332.29
AIC(m1_elev$model)
## [1] 12275.91
So we pick elev, because it by itself explains the movement better Fit step selection function
m_1 <- fit_clogit(ssf_cilla, case_ ~ cos(ta_) + sl_ + log(sl_) + slope_end + elev_end +
mean_NDVI_end + var_NDVI_end + strata(step_id_))
summary(m_1)
## Call:
## coxph(formula = Surv(rep(1, 233762L), case_) ~ cos(ta_) + sl_ +
## log(sl_) + slope_end + elev_end + mean_NDVI_end + var_NDVI_end +
## strata(step_id_), data = data, method = "exact")
##
## n= 233762, number of events= 1162
##
## coef exp(coef) se(coef) z Pr(>|z|)
## cos(ta_) 1.749e-02 1.018e+00 4.691e-02 0.373 0.70926
## sl_ 6.105e-06 1.000e+00 6.204e-05 0.098 0.92161
## log(sl_) 1.010e-02 1.010e+00 3.350e-02 0.302 0.76296
## slope_end -8.711e-03 9.913e-01 2.151e-02 -0.405 0.68543
## elev_end -1.586e-02 9.843e-01 3.226e-03 -4.915 8.86e-07 ***
## mean_NDVI_end 5.646e+00 2.832e+02 2.137e+00 2.642 0.00825 **
## var_NDVI_end 2.914e-01 1.338e+00 9.797e+00 0.030 0.97627
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## cos(ta_) 1.0176 0.982661 9.282e-01 1.116e+00
## sl_ 1.0000 0.999994 9.999e-01 1.000e+00
## log(sl_) 1.0102 0.989947 9.460e-01 1.079e+00
## slope_end 0.9913 1.008750 9.504e-01 1.034e+00
## elev_end 0.9843 1.015983 9.781e-01 9.905e-01
## mean_NDVI_end 283.2445 0.003531 4.293e+00 1.869e+04
## var_NDVI_end 1.3383 0.747194 6.129e-09 2.923e+08
##
## Concordance= 0.546 (se = 0.008 )
## Likelihood ratio test= 64.17 on 7 df, p=2e-11
## Wald test = 55.28 on 7 df, p=1e-09
## Score (logrank) test = 55.57 on 7 df, p=1e-09
slope and var_NDVI do not contribute significantly to the fit ## Model selection Model selection is a vast topic. I recommend using only few models with ecological justification, rather than searching for the “best” model in a huge model space. Here we just use stepwise backward selection based on AIC
m_2 <- fit_clogit(ssf_cilla, case_ ~ cos(ta_) + sl_ + log(sl_) + slope_end + elev_end + mean_NDVI_end + strata(step_id_))
AIC(m_1$model)
## [1] 12274.71
AIC(m_2$model)
## [1] 12272.71
summary(m_2)
## Call:
## coxph(formula = Surv(rep(1, 233762L), case_) ~ cos(ta_) + sl_ +
## log(sl_) + slope_end + elev_end + mean_NDVI_end + strata(step_id_),
## data = data, method = "exact")
##
## n= 233762, number of events= 1162
##
## coef exp(coef) se(coef) z Pr(>|z|)
## cos(ta_) 1.745e-02 1.018e+00 4.689e-02 0.372 0.70983
## sl_ 6.143e-06 1.000e+00 6.203e-05 0.099 0.92110
## log(sl_) 1.010e-02 1.010e+00 3.350e-02 0.301 0.76315
## slope_end -8.805e-03 9.912e-01 2.127e-02 -0.414 0.67892
## elev_end -1.582e-02 9.843e-01 2.959e-03 -5.346 9.01e-08 ***
## mean_NDVI_end 5.642e+00 2.822e+02 2.133e+00 2.645 0.00817 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## cos(ta_) 1.0176 0.982705 0.9283 1.116
## sl_ 1.0000 0.999994 0.9999 1.000
## log(sl_) 1.0101 0.989955 0.9460 1.079
## slope_end 0.9912 1.008844 0.9508 1.033
## elev_end 0.9843 1.015945 0.9786 0.990
## mean_NDVI_end 282.1561 0.003544 4.3119 18463.284
##
## Concordance= 0.546 (se = 0.008 )
## Likelihood ratio test= 64.17 on 6 df, p=6e-12
## Wald test = 55.32 on 6 df, p=4e-10
## Score (logrank) test = 55.14 on 6 df, p=4e-10
m_3 <- fit_clogit(ssf_cilla, case_ ~ cos(ta_) + sl_ + log(sl_) + elev_end + mean_NDVI_end + strata(step_id_))
AIC(m_3$model)
## [1] 12270.89
summary(m_3)
## Call:
## coxph(formula = Surv(rep(1, 233762L), case_) ~ cos(ta_) + sl_ +
## log(sl_) + elev_end + mean_NDVI_end + strata(step_id_), data = data,
## method = "exact")
##
## n= 233762, number of events= 1162
##
## coef exp(coef) se(coef) z Pr(>|z|)
## cos(ta_) 1.715e-02 1.017e+00 4.688e-02 0.366 0.71456
## sl_ 5.654e-06 1.000e+00 6.202e-05 0.091 0.92737
## log(sl_) 1.017e-02 1.010e+00 3.350e-02 0.304 0.76139
## elev_end -1.644e-02 9.837e-01 2.572e-03 -6.392 1.64e-10 ***
## mean_NDVI_end 5.344e+00 2.093e+02 2.009e+00 2.659 0.00783 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## cos(ta_) 1.0173 0.983000 0.9280 1.115e+00
## sl_ 1.0000 0.999994 0.9999 1.000e+00
## log(sl_) 1.0102 0.989879 0.9460 1.079e+00
## elev_end 0.9837 1.016572 0.9788 9.887e-01
## mean_NDVI_end 209.3137 0.004778 4.0768 1.075e+04
##
## Concordance= 0.545 (se = 0.008 )
## Likelihood ratio test= 63.99 on 5 df, p=2e-12
## Wald test = 55.23 on 5 df, p=1e-10
## Score (logrank) test = 54.66 on 5 df, p=2e-10
Forester et al. 2009 Ecology 90:3554–3565. Calculate deviance residuals for each stratum (i.e., the sum of the residuals for the case and all associated controls).
ssf_residuals <- function(m, data) {
df <- tibble::as_tibble(data.frame("time" = data$t1_,
"residuals" = residuals(m$model, type = "deviance")))
df <- df %>% dplyr::group_by(time) %>% dplyr::summarise(residuals = sum(residuals))
df$group <- 1
df
}
resid_df <- ssf_residuals(m_3, ssf_cilla)
Fit an intercept-only mixed-effects model using lme() from the nlme package.
rm1 <- lme(residuals ~ 1, random = ~ 1 | group,
data = resid_df)
plot(ACF(rm1, maxLag = 40), alpha = 0.05)
So we see that there is some significant autocorrelation at lag 1.
One effect of residual temporal autocorrelation is too extreme p-values, but it may also cause bias in parameter estimates. Forester et al. 2009 suggest a way to estimate more appropriate confidence intervals and p-values. ## Model evaluation - R2 is low. Always is. - Not yet clear what a good performance index would be. Perhaps https://github.com/aaarchmiller/uhcplots will help. - Cross-validation - split steps in e.g. 5 folds (long stretches better - should be long enough so that autocorrelation in residuals has tapered off) - leave each fold out, refit and predict to left-out fold
Caveat: Note that this habitat preference in general will not match the utilisation distribution of the animal (i.e. how much time it spends where). See below for more information. The raster prediction function assumes that all environmental layers are represented in one raster stack
We need to exclude the parameters that are not related to the environment, i.e. the first three parameters related to turning angles and step length
coef(m_1)
## cos(ta_) sl_ log(sl_) slope_end elev_end
## 1.749117e-02 6.105408e-06 1.010394e-02 -8.711476e-03 -1.585708e-02
## mean_NDVI_end var_NDVI_end
## 5.646310e+00 2.914306e-01
remove_end <- function(x) {
names(x) <- gsub("_end", "", names(x))
x
}
habitat_map <- habitat_kernel(remove_end(coef(m_1)[-(1:3)]), buffalo_env, exp = FALSE)
ggp <- ggR(habitat_map, geom_raster = TRUE) +
scale_fill_gradient(low = "lightgray", high = "black")
ggp + geom_path(data = ssf_cilla, aes(x = x1_, y = y1_))
Now zoom in
ggp <- ggR(crop(habitat_map, extent(as_sp(cilla)) + 5000), geom_raster = TRUE) +
scale_fill_gradient(low = "lightgray", high = "black")
ggp + geom_path(data = ssf_cilla, aes(x = x1_, y = y1_, color = factor(burst_)))
The model is strongly driven by elevation
ggp <- ggR(crop(buffalo_env, extent(as_sp(cilla)) + 5000), layer = "elev", geom_raster = TRUE) +
scale_fill_gradient(low = "lightgray", high = "black")
ggp + geom_path(data = ssf_cilla, aes(x = x1_, y = y1_, color = factor(burst_)))
Here: a model with time-varying preference for mean_NDVI We group observation in 3 hour bins to smooth the picture
boxplot(mean_NDVI_end ~ I(floor(hour/3)*3), data =
filter(ssf_cilla, case_ == 1),xlab = "Time of day",
ylab = "mean NDVI")
We can do the same for other variables, including those of the “movement kernel”, e.g. distance
boxplot(sl_ ~ floor(hour), data =
filter(ssf_cilla, case_ == 1), xlab = "Time of day",
ylab = "dist")
What behavioural rhythm do these figures suggest?
m_time_ndvi <- fit_clogit(ssf_cilla, case_ ~ cos(ta_) + sl_ + log(sl_) + elev_end + mean_NDVI_end +
mean_NDVI_end:pbs(hour, df = 5, Boundary.knots = c(0,24)) + strata(step_id_))
m_time_dist <- fit_clogit(ssf_cilla, case_ ~ cos(ta_) + sl_ + log(sl_) + elev_end + mean_NDVI_end +
sl_:pbs(hour, df = 5, Boundary.knots = c(0,24)) + strata(step_id_))
extract_stratum <- function(object) {
attr(object$terms, 'special')$strata[1]
}
stratum <- extract_stratum(m_time_ndvi$model)
pred_data_ndvi <- data.frame("step_id_" = stratum, ta_ = 0, sl_ = 1, elev_end = 0, mean_NDVI_end = 1, hour = seq(0, 24, len = 101))
m_time_ndvi <- clogit(case_ ~ cos(ta_) + sl_ + log(sl_) + elev_end + mean_NDVI_end +
mean_NDVI_end:pbs(hour, df = 5, Boundary.knots = c(0,24)) + strata(step_id_), data = ssf_cilla)
pred_time <- survival:::predict.coxph(m_time_ndvi, newdata = pred_data_ndvi, se.fit = TRUE)
upper <- pred_time$fit + 1.96 * pred_time$se.fit
lower <- pred_time$fit - 1.96 * pred_time$se.fit
par(mfrow = c(1, 1))
plot(pred_data_ndvi$hour, pred_time$fit, type = "l",
ylim = range(c(upper, lower)), xlab = "Time of day",
ylab = "Preference mean_NDVI")
lines(pred_data_ndvi$hour, upper, lty = 2)
lines(pred_data_ndvi$hour, lower, lty = 2)
abline(h = 0, lty = 3)
set.seed(2)
k1 <- amt:::redistribution_kernel(m_3, map = buffalo_env, start = amt:::make_start.steps_xyt(ssf_cilla[1, ]),
stochastic = TRUE, tolerance.outside = 0.2, as.raster = FALSE,
n.control = 1e3)
s1 <- amt:::simulate_path.redistribution_kernel(k1, n.steps = 500)
extent_tracks <- function(x, y) {
df <- data.frame(na.omit(rbind(x[,c("x_", "y_")], y[,c("x_", "y_")])))
raster::extent(c(range(df$x_), range(df$y_)))
}
elev_crop <- crop(buffalo_env[["elev"]], extent_tracks(s1, cilla) + 2000)
raster::plot(elev_crop)
lines(cilla)
lines(s1$x_, s1$y_, col = "red")
m_hr <- fit_clogit(ssf_cilla, case_ ~ cos(ta_) + sl_ + log(sl_) +
elev_end + water_dist_end + x2_ + y2_ + I(x2_^2 + y2_^2) + strata(step_id_))
summary(m_hr)
## Call:
## coxph(formula = Surv(rep(1, 233762L), case_) ~ cos(ta_) + sl_ +
## log(sl_) + elev_end + water_dist_end + x2_ + y2_ + I(x2_^2 +
## y2_^2) + strata(step_id_), data = data, method = "exact")
##
## n= 233762, number of events= 1162
##
## coef exp(coef) se(coef) z Pr(>|z|)
## cos(ta_) 2.994e-02 1.030e+00 4.706e-02 0.636 0.525
## sl_ 3.445e-05 1.000e+00 6.211e-05 0.555 0.579
## log(sl_) 7.742e-03 1.008e+00 3.348e-02 0.231 0.817
## elev_end -2.144e-02 9.788e-01 3.080e-03 -6.961 3.38e-12 ***
## water_dist_end -1.382e-04 9.999e-01 1.349e-04 -1.024 0.306
## x2_ 1.301e-02 1.013e+00 2.577e-03 5.046 4.51e-07 ***
## y2_ 2.401e-01 1.271e+00 4.805e-02 4.997 5.83e-07 ***
## I(x2_^2 + y2_^2) -1.658e-08 1.000e+00 3.322e-09 -4.992 5.97e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## cos(ta_) 1.0304 0.9705 0.9396 1.1300
## sl_ 1.0000 1.0000 0.9999 1.0002
## log(sl_) 1.0078 0.9923 0.9438 1.0761
## elev_end 0.9788 1.0217 0.9729 0.9847
## water_dist_end 0.9999 1.0001 0.9996 1.0001
## x2_ 1.0131 0.9871 1.0080 1.0182
## y2_ 1.2714 0.7866 1.1571 1.3969
## I(x2_^2 + y2_^2) 1.0000 1.0000 1.0000 1.0000
##
## Concordance= 0.552 (se = 0.008 )
## Likelihood ratio test= 91.57 on 8 df, p=2e-16
## Wald test = 73.65 on 8 df, p=9e-13
## Score (logrank) test = 70.4 on 8 df, p=4e-12
k2 <- amt:::redistribution_kernel(m_hr, map = buffalo_env, start = amt:::make_start.steps_xyt(ssf_cilla[1, ]),
stochastic = TRUE, tolerance.outside = 0.01, as.raster = FALSE,
n.control = 1e3)
set.seed(2)
s2 <- amt:::simulate_path.redistribution_kernel(k2, n.steps = 1000)
raster::plot(crop(buffalo_env[["elev"]], extent_tracks(s2, cilla) + 2000))
lines(cilla)
lines(s2$x_, s2$y_, col = "red")
water <- buffalo_env[["water_dist"]] > 100
water <- crop(water, extent(water) - 5000)
raster::plot(water)
ww <- clump(water)
## Loading required namespace: igraph
raster::plot(ww)
ww[ww == 0] <- 2
names(ww) <- "water_crossed"
buffalo_env_2 <- raster::stack(ww, crop(buffalo_env, ww))
set.seed(2)
ssf_cilla <- cilla %>% steps_by_burst() %>% random_steps(n_control = 1000) %>%
extract_covariates(buffalo_env_2, where = "both") %>%
mutate(hour = hour(t1_) + minute(t1_) / 60) %>%
filter(complete.cases(.))
m_crossing <- fit_clogit(ssf_cilla, case_ ~ cos(ta_) + sl_ + log(sl_) +
elev_end + water_dist_end + x2_ + y2_ + I(x2_^2 + y2_^2) +
I(water_crossed_end != water_crossed_start) + strata(step_id_))
## Warning in coxexact.fit(X, Y, istrat, offset, init, control, weights =
## weights, : Loglik converged before variable 9 ; beta may be infinite.
summary(m_crossing)
## Call:
## coxph(formula = Surv(rep(1, 1126664L), case_) ~ cos(ta_) + sl_ +
## log(sl_) + elev_end + water_dist_end + x2_ + y2_ + I(x2_^2 +
## y2_^2) + I(water_crossed_end != water_crossed_start) + strata(step_id_),
## data = data, method = "exact")
##
## n= 1126664, number of events= 1119
##
## coef exp(coef)
## cos(ta_) 6.408e-02 1.066e+00
## sl_ 3.206e-05 1.000e+00
## log(sl_) 1.341e-02 1.013e+00
## elev_end -2.122e-02 9.790e-01
## water_dist_end 5.736e-04 1.001e+00
## x2_ 7.142e-03 1.007e+00
## y2_ 1.387e-01 1.149e+00
## I(x2_^2 + y2_^2) -9.613e-09 1.000e+00
## I(water_crossed_end != water_crossed_start)TRUE -1.716e+01 3.544e-08
## se(coef) z Pr(>|z|)
## cos(ta_) 4.840e-02 1.324 0.18549
## sl_ 6.442e-05 0.498 0.61869
## log(sl_) 3.408e-02 0.393 0.69397
## elev_end 3.151e-03 -6.734 1.65e-11 ***
## water_dist_end 1.812e-04 3.166 0.00155 **
## x2_ 2.887e-03 2.474 0.01337 *
## y2_ 5.345e-02 2.595 0.00947 **
## I(x2_^2 + y2_^2) 3.693e-09 -2.603 0.00924 **
## I(water_crossed_end != water_crossed_start)TRUE 7.176e+02 -0.024 0.98093
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95
## cos(ta_) 1.066e+00 9.379e-01 0.9697
## sl_ 1.000e+00 1.000e+00 0.9999
## log(sl_) 1.013e+00 9.867e-01 0.9480
## elev_end 9.790e-01 1.021e+00 0.9730
## water_dist_end 1.001e+00 9.994e-01 1.0002
## x2_ 1.007e+00 9.929e-01 1.0015
## y2_ 1.149e+00 8.705e-01 1.0345
## I(x2_^2 + y2_^2) 1.000e+00 1.000e+00 1.0000
## I(water_crossed_end != water_crossed_start)TRUE 3.544e-08 2.822e+07 0.0000
## upper .95
## cos(ta_) 1.1723
## sl_ 1.0002
## log(sl_) 1.0835
## elev_end 0.9851
## water_dist_end 1.0009
## x2_ 1.0129
## y2_ 1.2756
## I(x2_^2 + y2_^2) 1.0000
## I(water_crossed_end != water_crossed_start)TRUE Inf
##
## Concordance= 0.562 (se = 0.008 )
## Likelihood ratio test= 124.3 on 9 df, p=<2e-16
## Wald test = 58.09 on 9 df, p=3e-09
## Score (logrank) test = 83.48 on 9 df, p=3e-14
set.seed(2)
k3 <- amt:::redistribution_kernel(m_crossing, map = stack(buffalo_env_2),
start = amt:::make_start.steps_xyt(ssf_cilla[1, ]),
stochastic = TRUE, tolerance.outside = 0.01,
as.raster = FALSE, n.control = 1e3)
s3 <- amt:::simulate_path.redistribution_kernel(k3, n.steps = 1000)
raster::plot(crop(buffalo_env[["elev"]], extent_tracks(s3, cilla) + 2000))
lines(cilla)
lines(s3$x_, s3$y_, col = "red")
There is a fast method if we have a symmetric and temporally stable jump kernel, e.g. exponential, and no effect of step angles: Barnett, A. & Moorcroft, P. (2008) Analytic steady-state space use patterns and rapid computations in mechanistic home range analysis. Journal of Mathematical Biology, 57, 139–159. The generic but computationally expensive method is to do simulations: Signer et al. (2017) Estimating utilization distributions from fitted step-selection functions. Ecosphere 8: e01771 # Dependence of results on step interval
step_durations <- 3:12
do_run <- TRUE
buffalo_ids <- levels(factor(buffalo_tracks$id))# c("Cilla", "Gabs", "Mvubu")# levels(factor(buffalo_tracks$id))
if(do_run) {
step_interval_simulation_amt <- lapply(buffalo_ids, function(animal) {
lapply(step_durations, function(step_duration) {
ssf_animal <- filter(buffalo_tracks, id == animal) %>%
track_resample(hours(step_duration), tolerance = minutes(15)) %>%
filter_min_n_burst(3) %>%
steps_by_burst() %>%
random_steps(n = 200) %>%
extract_covariates(buffalo_env) %>%
filter(complete.cases(.)) %>% filter(sl_ > 0)
m_1 <- clogit(case_ ~ cos(ta_) + sl_ + log(sl_) + elev +
mean_NDVI + strata(step_id_), data = ssf_animal)
list("coef" = coef(m_1), "confint" = confint(m_1))
})
})
}
## Steps with length 0 are present. This will lead to an error when fitting a gamma distribution. 0 step lengths are replaced with the smallest non zero step length, which is: 2.25240718247496
## Steps with length 0 are present. This will lead to an error when fitting a gamma distribution. 0 step lengths are replaced with the smallest non zero step length, which is: 1.12625675366255
do_run <- TRUE
if (do_run) {
for (j in seq(step_interval_simulation_amt)) {
model_list <- step_interval_simulation_amt[[j]]
name <- names(split(buffalo_utm))[j]
coefs <- sapply(model_list, function(x) x$coef)
ci_lower <- sapply(model_list, function(x) x$confint[, 1])
ci_upper <- sapply(model_list, function(x) x$confint[, 2])
par(mfrow = c(3, 2), mar=c(4,5,1,1), oma = c(1,1,5,1))
for (i in rownames(coefs)) {
plot(c(0,0), xlim = range(step_durations),
ylim = range(c(0, ci_lower[i, ],
coefs[i,],
ci_upper[i, ])), type = "n", xlab = "Step duration (hr)",
ylab = i)
abline(h = 0, lty = 2, col = "red")
lines(step_durations, ci_lower[i, ], lty = 3)
lines(step_durations, ci_upper[i, ], lty = 3)
lines(step_durations, coefs[i, ])
}
mtext(name, outer = TRUE)
}
}